Load the dataset into pandas¶

In [17]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

Display the first few rows¶

In [18]:
df15 = pd.read_csv('C:/Users/san/OneDrive/Desktop/WorldHappinessData/2015.csv')
df15.head()
Out[18]:
Country Region Happiness Rank Happiness Score Standard Error Economy (GDP per Capita) Family Health (Life Expectancy) Freedom Trust (Government Corruption) Generosity Dystopia Residual
0 Switzerland Western Europe 1 7.587 0.03411 1.39651 1.34951 0.94143 0.66557 0.41978 0.29678 2.51738
1 Iceland Western Europe 2 7.561 0.04884 1.30232 1.40223 0.94784 0.62877 0.14145 0.43630 2.70201
2 Denmark Western Europe 3 7.527 0.03328 1.32548 1.36058 0.87464 0.64938 0.48357 0.34139 2.49204
3 Norway Western Europe 4 7.522 0.03880 1.45900 1.33095 0.88521 0.66973 0.36503 0.34699 2.46531
4 Canada North America 5 7.427 0.03553 1.32629 1.32261 0.90563 0.63297 0.32957 0.45811 2.45176
In [11]:
df16 = pd.read_csv('C:/Users/san/OneDrive/Desktop/WorldHappinessData/2016.csv')
df16.head()
Out[11]:
Country Region Happiness Rank Happiness Score Lower Confidence Interval Upper Confidence Interval Economy (GDP per Capita) Family Health (Life Expectancy) Freedom Trust (Government Corruption) Generosity Dystopia Residual
0 Denmark Western Europe 1 7.526 7.460 7.592 1.44178 1.16374 0.79504 0.57941 0.44453 0.36171 2.73939
1 Switzerland Western Europe 2 7.509 7.428 7.590 1.52733 1.14524 0.86303 0.58557 0.41203 0.28083 2.69463
2 Iceland Western Europe 3 7.501 7.333 7.669 1.42666 1.18326 0.86733 0.56624 0.14975 0.47678 2.83137
3 Norway Western Europe 4 7.498 7.421 7.575 1.57744 1.12690 0.79579 0.59609 0.35776 0.37895 2.66465
4 Finland Western Europe 5 7.413 7.351 7.475 1.40598 1.13464 0.81091 0.57104 0.41004 0.25492 2.82596
In [12]:
df17 = pd.read_csv('C:/Users/san/OneDrive/Desktop/WorldHappinessData/2017.csv')
df17.head()
Out[12]:
Region Country Happiness.Rank Happiness.Score Whisker.high Whisker.low Economy..GDP.per.Capita. Family Health..Life.Expectancy. Freedom Generosity Trust..Government.Corruption. Dystopia.Residual
0 Western Europe Norway 1 7.537 7.594445 7.479556 1.616463 1.533524 0.796667 0.635423 0.362012 0.315964 2.277027
1 Western Europe Denmark 2 7.522 7.581728 7.462272 1.482383 1.551122 0.792566 0.626007 0.355280 0.400770 2.313707
2 Western Europe Iceland 3 7.504 7.622030 7.385970 1.480633 1.610574 0.833552 0.627163 0.475540 0.153527 2.322715
3 Western Europe Switzerland 4 7.494 7.561772 7.426227 1.564980 1.516912 0.858131 0.620071 0.290549 0.367007 2.276716
4 Western Europe Finland 5 7.469 7.527542 7.410458 1.443572 1.540247 0.809158 0.617951 0.245483 0.382612 2.430182
In [13]:
df18 = pd.read_csv('C:/Users/san/OneDrive/Desktop/WorldHappinessData/2018.csv')
df18.head()
Out[13]:
Overall rank Country or region Score GDP per capita Social support Healthy life expectancy Freedom to make life choices Generosity Perceptions of corruption
0 1 Finland 7.632 1.305 1.592 0.874 0.681 0.202 0.393
1 2 Norway 7.594 1.456 1.582 0.861 0.686 0.286 0.340
2 3 Denmark 7.555 1.351 1.590 0.868 0.683 0.284 0.408
3 4 Iceland 7.495 1.343 1.644 0.914 0.677 0.353 0.138
4 5 Switzerland 7.487 1.420 1.549 0.927 0.660 0.256 0.357
In [14]:
df19 = pd.read_csv('C:/Users/san/OneDrive/Desktop/WorldHappinessData/2019.csv')
df19.head()
Out[14]:
Overall rank Country or region Score GDP per capita Social support Healthy life expectancy Freedom to make life choices Generosity Perceptions of corruption
0 1 Finland 7.769 1.340 1.587 0.986 0.596 0.153 0.393
1 2 Denmark 7.600 1.383 1.573 0.996 0.592 0.252 0.410
2 3 Norway 7.554 1.488 1.582 1.028 0.603 0.271 0.341
3 4 Iceland 7.494 1.380 1.624 1.026 0.591 0.354 0.118
4 5 Netherlands 7.488 1.396 1.522 0.999 0.557 0.322 0.298

The data types and summary statistics¶

In [64]:
df15.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 158 entries, 0 to 157
Data columns (total 12 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Country                        158 non-null    object 
 1   Region                         158 non-null    object 
 2   Happiness Rank                 158 non-null    int64  
 3   Happiness Score                158 non-null    float64
 4   Standard Error                 158 non-null    float64
 5   Economy (GDP per Capita)       158 non-null    float64
 6   Family                         158 non-null    float64
 7   Health (Life Expectancy)       158 non-null    float64
 8   Freedom                        158 non-null    float64
 9   Trust (Government Corruption)  158 non-null    float64
 10  Generosity                     158 non-null    float64
 11  Dystopia Residual              158 non-null    float64
dtypes: float64(9), int64(1), object(2)
memory usage: 14.9+ KB
In [23]:
df16.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 157 entries, 0 to 156
Data columns (total 13 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Country                        157 non-null    object 
 1   Region                         157 non-null    object 
 2   Happiness Rank                 157 non-null    int64  
 3   Happiness Score                157 non-null    float64
 4   Lower Confidence Interval      157 non-null    float64
 5   Upper Confidence Interval      157 non-null    float64
 6   Economy (GDP per Capita)       157 non-null    float64
 7   Family                         157 non-null    float64
 8   Health (Life Expectancy)       157 non-null    float64
 9   Freedom                        157 non-null    float64
 10  Trust (Government Corruption)  157 non-null    float64
 11  Generosity                     157 non-null    float64
 12  Dystopia Residual              157 non-null    float64
dtypes: float64(10), int64(1), object(2)
memory usage: 16.1+ KB
In [24]:
df17.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 155 entries, 0 to 154
Data columns (total 12 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Country                        155 non-null    object 
 1   Happiness.Rank                 155 non-null    int64  
 2   Happiness.Score                155 non-null    float64
 3   Whisker.high                   155 non-null    float64
 4   Whisker.low                    155 non-null    float64
 5   Economy..GDP.per.Capita.       155 non-null    float64
 6   Family                         155 non-null    float64
 7   Health..Life.Expectancy.       155 non-null    float64
 8   Freedom                        155 non-null    float64
 9   Generosity                     155 non-null    float64
 10  Trust..Government.Corruption.  155 non-null    float64
 11  Dystopia.Residual              155 non-null    float64
dtypes: float64(10), int64(1), object(1)
memory usage: 14.7+ KB
In [25]:
df18.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 9 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Overall rank                  156 non-null    int64  
 1   Country or region             156 non-null    object 
 2   Score                         156 non-null    float64
 3   GDP per capita                156 non-null    float64
 4   Social support                156 non-null    float64
 5   Healthy life expectancy       156 non-null    float64
 6   Freedom to make life choices  156 non-null    float64
 7   Generosity                    156 non-null    float64
 8   Perceptions of corruption     155 non-null    float64
dtypes: float64(7), int64(1), object(1)
memory usage: 11.1+ KB
In [26]:
df19.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 9 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Overall rank                  156 non-null    int64  
 1   Country or region             156 non-null    object 
 2   Score                         156 non-null    float64
 3   GDP per capita                156 non-null    float64
 4   Social support                156 non-null    float64
 5   Healthy life expectancy       156 non-null    float64
 6   Freedom to make life choices  156 non-null    float64
 7   Generosity                    156 non-null    float64
 8   Perceptions of corruption     156 non-null    float64
dtypes: float64(7), int64(1), object(1)
memory usage: 11.1+ KB
In [27]:
df15.describe()
Out[27]:
Overall rank Score GDP per capita Social support Healthy life expectancy Freedom to make life choices Generosity Perceptions of corruption
count 156.000000 156.000000 156.000000 156.000000 156.000000 156.000000 156.000000 156.000000
mean 78.500000 5.407096 0.905147 1.208814 0.725244 0.392571 0.184846 0.110603
std 45.177428 1.113120 0.398389 0.299191 0.242124 0.143289 0.095254 0.094538
min 1.000000 2.853000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 39.750000 4.544500 0.602750 1.055750 0.547750 0.308000 0.108750 0.047000
50% 78.500000 5.379500 0.960000 1.271500 0.789000 0.417000 0.177500 0.085500
75% 117.250000 6.184500 1.232500 1.452500 0.881750 0.507250 0.248250 0.141250
max 156.000000 7.769000 1.684000 1.624000 1.141000 0.631000 0.566000 0.453000
In [28]:
df16.describe()
Out[28]:
Happiness Rank Happiness Score Lower Confidence Interval Upper Confidence Interval Economy (GDP per Capita) Family Health (Life Expectancy) Freedom Trust (Government Corruption) Generosity Dystopia Residual
count 157.000000 157.000000 157.000000 157.000000 157.000000 157.000000 157.000000 157.000000 157.000000 157.000000 157.000000
mean 78.980892 5.382185 5.282395 5.481975 0.953880 0.793621 0.557619 0.370994 0.137624 0.242635 2.325807
std 45.466030 1.141674 1.148043 1.136493 0.412595 0.266706 0.229349 0.145507 0.111038 0.133756 0.542220
min 1.000000 2.905000 2.732000 3.078000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.817890
25% 40.000000 4.404000 4.327000 4.465000 0.670240 0.641840 0.382910 0.257480 0.061260 0.154570 2.031710
50% 79.000000 5.314000 5.237000 5.419000 1.027800 0.841420 0.596590 0.397470 0.105470 0.222450 2.290740
75% 118.000000 6.269000 6.154000 6.434000 1.279640 1.021520 0.729930 0.484530 0.175540 0.311850 2.664650
max 157.000000 7.526000 7.460000 7.669000 1.824270 1.183260 0.952770 0.608480 0.505210 0.819710 3.837720
In [95]:
df17.describe()
Out[95]:
Happiness.Rank Happiness.Score Whisker.high Whisker.low Economy..GDP.per.Capita. Family Health..Life.Expectancy. Freedom Generosity Trust..Government.Corruption. Dystopia.Residual
count 155.000000 155.000000 155.000000 155.000000 155.000000 155.000000 155.000000 155.000000 155.000000 155.000000 155.000000
mean 78.000000 5.354019 5.452326 5.255713 0.984718 1.188898 0.551341 0.408786 0.246883 0.123120 1.850238
std 44.888751 1.131230 1.118542 1.145030 0.420793 0.287263 0.237073 0.149997 0.134780 0.101661 0.500028
min 1.000000 2.693000 2.864884 2.521116 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.377914
25% 39.500000 4.505500 4.608172 4.374955 0.663371 1.042635 0.369866 0.303677 0.154106 0.057271 1.591291
50% 78.000000 5.279000 5.370032 5.193152 1.064578 1.253918 0.606042 0.437454 0.231538 0.089848 1.832910
75% 116.500000 6.101500 6.194600 6.006527 1.318027 1.414316 0.723008 0.516561 0.323762 0.153296 2.144654
max 155.000000 7.537000 7.622030 7.479556 1.870766 1.610574 0.949492 0.658249 0.838075 0.464308 3.117485
In [30]:
df18.describe()
Out[30]:
Overall rank Score GDP per capita Social support Healthy life expectancy Freedom to make life choices Generosity Perceptions of corruption
count 156.000000 156.000000 156.000000 156.000000 156.000000 156.000000 156.000000 155.000000
mean 78.500000 5.375917 0.891449 1.213237 0.597346 0.454506 0.181006 0.112000
std 45.177428 1.119506 0.391921 0.302372 0.247579 0.162424 0.098471 0.096492
min 1.000000 2.905000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 39.750000 4.453750 0.616250 1.066750 0.422250 0.356000 0.109500 0.051000
50% 78.500000 5.378000 0.949500 1.255000 0.644000 0.487000 0.174000 0.082000
75% 117.250000 6.168500 1.197750 1.463000 0.777250 0.578500 0.239000 0.137000
max 156.000000 7.632000 2.096000 1.644000 1.030000 0.724000 0.598000 0.457000
In [33]:
df19.describe()
Out[33]:
Overall rank Score GDP per capita Social support Healthy life expectancy Freedom to make life choices Generosity Perceptions of corruption
count 156.000000 156.000000 156.000000 156.000000 156.000000 156.000000 156.000000 156.000000
mean 78.500000 5.407096 0.905147 1.208814 0.725244 0.392571 0.184846 0.110603
std 45.177428 1.113120 0.398389 0.299191 0.242124 0.143289 0.095254 0.094538
min 1.000000 2.853000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 39.750000 4.544500 0.602750 1.055750 0.547750 0.308000 0.108750 0.047000
50% 78.500000 5.379500 0.960000 1.271500 0.789000 0.417000 0.177500 0.085500
75% 117.250000 6.184500 1.232500 1.452500 0.881750 0.507250 0.248250 0.141250
max 156.000000 7.769000 1.684000 1.624000 1.141000 0.631000 0.566000 0.453000

Null and duplicate values¶

In [65]:
df15.isnull().sum()
Out[65]:
Country                          0
Region                           0
Happiness Rank                   0
Happiness Score                  0
Standard Error                   0
Economy (GDP per Capita)         0
Family                           0
Health (Life Expectancy)         0
Freedom                          0
Trust (Government Corruption)    0
Generosity                       0
Dystopia Residual                0
dtype: int64
In [83]:
df15.duplicated().sum()
Out[83]:
0
In [36]:
df16.isnull().sum()
Out[36]:
Country                          0
Region                           0
Happiness Rank                   0
Happiness Score                  0
Lower Confidence Interval        0
Upper Confidence Interval        0
Economy (GDP per Capita)         0
Family                           0
Health (Life Expectancy)         0
Freedom                          0
Trust (Government Corruption)    0
Generosity                       0
Dystopia Residual                0
dtype: int64
In [84]:
df16.duplicated().sum()
Out[84]:
0
In [94]:
df17.isnull().sum()
Out[94]:
Region                           35
Country                           0
Happiness.Rank                    0
Happiness.Score                   0
Whisker.high                      0
Whisker.low                       0
Economy..GDP.per.Capita.          0
Family                            0
Health..Life.Expectancy.          0
Freedom                           0
Generosity                        0
Trust..Government.Corruption.     0
Dystopia.Residual                 0
dtype: int64
In [96]:
df17.duplicated().sum()
Out[96]:
0
In [39]:
df18.isnull().sum()
Out[39]:
Overall rank                    0
Country or region               0
Score                           0
GDP per capita                  0
Social support                  0
Healthy life expectancy         0
Freedom to make life choices    0
Generosity                      0
Perceptions of corruption       1
dtype: int64
In [44]:
df18['Perceptions of corruption'] = df18['Perceptions of corruption'].replace('N/A', 0)
In [86]:
df18.duplicated().sum()
Out[86]:
0
In [40]:
df19.isnull().sum()
Out[40]:
Overall rank                    0
Country or region               0
Score                           0
GDP per capita                  0
Social support                  0
Healthy life expectancy         0
Freedom to make life choices    0
Generosity                      0
Perceptions of corruption       0
dtype: int64
In [81]:
df19.duplicated().sum()
Out[81]:
0

Histogram of Happiness Scores and features¶

In [154]:
num_features = df15.select_dtypes('number').columns
df15[num_features].hist(bins=15,figsize=(15,6) ,layout=(2,5))
Out[154]:
array([[<Axes: title={'center': 'Happiness Rank'}>,
        <Axes: title={'center': 'Happiness Score'}>,
        <Axes: title={'center': 'Standard Error'}>,
        <Axes: title={'center': 'Economy (GDP per Capita)'}>,
        <Axes: title={'center': 'Family'}>],
       [<Axes: title={'center': 'Health (Life Expectancy)'}>,
        <Axes: title={'center': 'Freedom'}>,
        <Axes: title={'center': 'Trust (Government Corruption)'}>,
        <Axes: title={'center': 'Generosity'}>,
        <Axes: title={'center': 'Dystopia Residual'}>]], dtype=object)
No description has been provided for this image
In [52]:
num_features16 = df16.select_dtypes('number').columns
df16[num_features16].hist(bins=15,figsize=(15,6) ,layout=(3,4))
Out[52]:
array([[<Axes: title={'center': 'Happiness Rank'}>,
        <Axes: title={'center': 'Happiness Score'}>,
        <Axes: title={'center': 'Lower Confidence Interval'}>,
        <Axes: title={'center': 'Upper Confidence Interval'}>],
       [<Axes: title={'center': 'Economy (GDP per Capita)'}>,
        <Axes: title={'center': 'Family'}>,
        <Axes: title={'center': 'Health (Life Expectancy)'}>,
        <Axes: title={'center': 'Freedom'}>],
       [<Axes: title={'center': 'Trust (Government Corruption)'}>,
        <Axes: title={'center': 'Generosity'}>,
        <Axes: title={'center': 'Dystopia Residual'}>, <Axes: >]],
      dtype=object)
No description has been provided for this image
In [56]:
num_features17 = df17.select_dtypes('number').columns
df17[num_features17].hist(bins=15,figsize=(15,6) ,layout=(3,4))
Out[56]:
array([[<Axes: title={'center': 'Happiness.Rank'}>,
        <Axes: title={'center': 'Happiness.Score'}>,
        <Axes: title={'center': 'Whisker.high'}>,
        <Axes: title={'center': 'Whisker.low'}>],
       [<Axes: title={'center': 'Economy..GDP.per.Capita.'}>,
        <Axes: title={'center': 'Family'}>,
        <Axes: title={'center': 'Health..Life.Expectancy.'}>,
        <Axes: title={'center': 'Freedom'}>],
       [<Axes: title={'center': 'Generosity'}>,
        <Axes: title={'center': 'Trust..Government.Corruption.'}>,
        <Axes: title={'center': 'Dystopia.Residual'}>, <Axes: >]],
      dtype=object)
No description has been provided for this image
In [58]:
num_features18 = df18.select_dtypes('number').columns
df18[num_features18].hist(bins=15,figsize=(15,6) ,layout=(2,4))
Out[58]:
array([[<Axes: title={'center': 'Overall rank'}>,
        <Axes: title={'center': 'Score'}>,
        <Axes: title={'center': 'GDP per capita'}>,
        <Axes: title={'center': 'Social support'}>],
       [<Axes: title={'center': 'Healthy life expectancy'}>,
        <Axes: title={'center': 'Freedom to make life choices'}>,
        <Axes: title={'center': 'Generosity'}>,
        <Axes: title={'center': 'Perceptions of corruption'}>]],
      dtype=object)
No description has been provided for this image
In [60]:
num_features19 = df19.select_dtypes('number').columns
df19[num_features19].hist(bins=15,figsize=(15,6) ,layout=(2,4))
Out[60]:
array([[<Axes: title={'center': 'Overall rank'}>,
        <Axes: title={'center': 'Score'}>,
        <Axes: title={'center': 'GDP per capita'}>,
        <Axes: title={'center': 'Social support'}>],
       [<Axes: title={'center': 'Healthy life expectancy'}>,
        <Axes: title={'center': 'Freedom to make life choices'}>,
        <Axes: title={'center': 'Generosity'}>,
        <Axes: title={'center': 'Perceptions of corruption'}>]],
      dtype=object)
No description has been provided for this image

Scatter Plots¶

In [6]:
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df15['Happiness Score'], y = df15['Economy (GDP per Capita)'], hue = df15['Region'] , s = 200);

plt.legend(loc = 'upper left', fontsize = '10')
plt.title('Plot between Hapiness score and GDP,2015')
plt.xlabel('Happiness Score')
plt.ylabel('GDP')
plt.show()

plt.subplots(figsize=(10,7))
sns.scatterplot(x=df15['Happiness Score'], y = df15['Family'], hue = df15['Region'] , s = 200);

plt.legend(loc = 'upper left', fontsize = '10')
plt.title('Plot between Hapiness score and Family,2015')
plt.xlabel('Happiness Score')
plt.ylabel('Family')
plt.show()

plt.subplots(figsize=(10,7))
sns.scatterplot(x=df15['Happiness Score'], y = df15['Health (Life Expectancy)'], hue = df15['Region'] , s = 200);

plt.legend(loc = 'upper left', fontsize = '10')
plt.title('Plot between Hapiness score and Life Expectancy,2015')
plt.xlabel('Happiness Score')
plt.ylabel('Life Expectancy')
plt.show()

plt.subplots(figsize=(10,7))
sns.scatterplot(x=df15['Happiness Score'], y = df15['Freedom'], hue = df15['Region'] , s = 200);

plt.legend(loc = 'upper left', fontsize = '10')
plt.title('Plot between Hapiness score and Freedom,2015')
plt.xlabel('Happiness Score')
plt.ylabel('Freedom')
plt.show()

plt.subplots(figsize=(10,7))
sns.scatterplot(x=df15['Happiness Score'], y = df15['Trust (Government Corruption)'], hue = df15['Region'] , s = 200);

plt.legend(loc = 'upper left', fontsize = '10')
plt.title('Plot between Hapiness score and Corruption,2015')
plt.xlabel('Happiness Score')
plt.ylabel('Corruption')
plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [75]:
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df16['Happiness Score'], y = df16['Economy (GDP per Capita)'], hue = df16['Region'] , s = 200);

plt.legend(loc = 'upper left', fontsize = '8')
plt.title('Plot between Hapiness score and GDP,2016')
plt.xlabel('Happiness Score')
plt.ylabel('GDP')
plt.show()
No description has been provided for this image
In [93]:
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df17['Happiness.Score'], y = df17['Economy..GDP.per.Capita.'], hue = df17['Region'] , s = 200);

plt.legend(loc = 'upper left', fontsize = '8')
plt.title('Plot between Hapiness score and GDP,2017')
plt.xlabel('Happiness Score')
plt.ylabel('GDP')
plt.show()
No description has been provided for this image
In [77]:
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df18['Score'], y = df18['GDP per capita'], hue = df18['Country or region'] , s = 200);

plt.legend(loc = 'upper left', fontsize = '8')
plt.title('Plot between Hapiness score and GDP,2018')
plt.xlabel('Happiness Score')
plt.ylabel('GDP')
plt.show()
No description has been provided for this image
In [79]:
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df19['Score'], y = df19['GDP per capita'], hue = df19['Country or region'] , s = 200);

plt.legend(loc = 'upper left', fontsize = '8')
plt.title('Plot between Hapiness score and GDP,2019')
plt.xlabel('Happiness Score')
plt.ylabel('GDP')
plt.show()
No description has been provided for this image

Correlation Map¶

In [50]:
numeric_df15 = df15.select_dtypes(include=[np.number])
cor15 = numeric_df15.corr(method = 'pearson')
f, ax = plt.subplots(figsize = (12,8))
sns.heatmap(cor15, cmap="Reds", xticklabels=cor.columns, yticklabels=cor.columns, square=False)
plt.title('2015,Correlation Map')
plt.show()

numeric_df16 = df16.select_dtypes(include=[np.number])
cor16 = numeric_df16.corr(method = 'pearson')
f, ax = plt.subplots(figsize = (12,8))
sns.heatmap(cor16, cmap="Reds", xticklabels=cor.columns, yticklabels=cor.columns, square=False)
plt.title('2016,Correlation Map')
plt.show()

numeric_df17 = df17.select_dtypes(include=[np.number])
cor17 = numeric_df17.corr(method = 'pearson')
f, ax = plt.subplots(figsize = (12,8))
sns.heatmap(cor17, cmap="Reds", xticklabels=cor.columns, yticklabels=cor.columns, square=False)
plt.title('2017,Correlation Map')
plt.show()

numeric_df18 = df18.select_dtypes(include=[np.number])
cor18 = numeric_df18.corr(method = 'pearson')
f, ax = plt.subplots(figsize = (12,8))
sns.heatmap(cor18, cmap="Reds", xticklabels=cor.columns, yticklabels=cor.columns, square=False)
plt.title('2018,Correlation Map')
plt.show()

numeric_df19 = df19.select_dtypes(include=[np.number])
cor19 = numeric_df19.corr(method = 'pearson')
f, ax = plt.subplots(figsize = (12,8))
sns.heatmap(cor19, cmap="Reds", xticklabels=cor.columns, yticklabels=cor.columns, square=False)
plt.title('2019,Correlation Map')
plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

The squares with a darker shade of red show a high correlation between the variables. As the color becomes lighter the correlation between the variables is low¶

Pair Plots¶

In [20]:
sns.pairplot(df15, hue ='Happiness Score')  # Use 'hue' to color by category, if available
plt.suptitle('Pair Plot of Happiness Score,2015', y=1.02)  # Add a title, adjust 'y' to position it correctly
plt.show()
No description has been provided for this image
In [19]:
sns.pairplot(df16, hue ='Happiness Score')  # Use 'hue' to color by category, if available
plt.suptitle('Pair Plot of Happiness Score,2016', y=1.02)  # Add a title, adjust 'y' to position it correctly
plt.show()
No description has been provided for this image
In [21]:
sns.pairplot(df17, hue ='Happiness.Score')  # Use 'hue' to color by category, if available
plt.suptitle('Pair Plot of Happiness Score,2017', y=1.02)  # Add a title, adjust 'y' to position it correctly
plt.show()
No description has been provided for this image

Facet Grids¶

In [ ]:
# Load or create a sample dataset
#df = sns.load_dataset('tips')  # Example dataset; replace with your own DataFrame

# Create a facet grid
g = sns.FacetGrid(df15, col='Happiness Score', row='Economy (GDP per Capita)', margin_titles=True)
g.map_dataframe(sns.scatterplot, x="Happiness Score", y="Economy (GDP per Capita)")
g.set_axis_labels("Happiness Score", "GDP")
g.add_legend()

plt.suptitle('2015', y=1.02)  # Add a title
plt.show()

Choropleth map¶

In [97]:
pip install plotly
Requirement already satisfied: plotly in c:\users\san\anaconda3\lib\site-packages (5.22.0)Note: you may need to restart the kernel to use updated packages.

Requirement already satisfied: tenacity>=6.2.0 in c:\users\san\anaconda3\lib\site-packages (from plotly) (8.2.2)
Requirement already satisfied: packaging in c:\users\san\anaconda3\lib\site-packages (from plotly) (23.2)
In [114]:
import plotly.express as px
fig = px.choropleth(
    data_frame=df19,
    locations='Country or region',
    locationmode='country names',
    color='Score',
    color_continuous_scale='Viridis',
    title='Global Happiness Scores,2019',
    labels={'Happiness Score': 'Happiness Score'},
    projection='natural earth',
    hover_data=['Score']
)

# Show the plot
fig.show()
In [129]:
fig = px.choropleth(
    df15,
    locations='Country',
    locationmode='country names',
    color='Happiness Score',
    color_continuous_scale='Viridis',
    title='Global Happiness Scores,2015',
    labels={'Happiness Score': 'Happiness Score'},
    projection='natural earth',
    hover_data=['Happiness Score']
)

# Show the plot
fig.show()
In [131]:
fig = px.choropleth(
    df16,
    locations='Country',
    locationmode='country names',
    color='Happiness Score',
    color_continuous_scale='Viridis',
    title='Global Happiness Scores,2016',
    labels={'Happiness Score': 'Happiness Score'},
    projection='natural earth',
    hover_data=['Happiness Score']
)

# Show the plot
fig.show()
In [133]:
fig = px.choropleth(
    df17,
    locations='Country',
    locationmode='country names',
    color='Happiness.Score',
    color_continuous_scale='Viridis',
    title='Global Happiness Scores,2017',
    labels={'Happiness Score': 'Happiness Score'},
    projection='natural earth',
    hover_data=['Happiness.Score']
)

# Show the plot
fig.show()
In [135]:
fig = px.choropleth(
    df18,
    locations='Country or region',
    locationmode='country names',
    color='Score',
    color_continuous_scale='Viridis',
    title='Global Happiness Scores,2018',
    labels={'Happiness Score': 'Happiness Score'},
    projection='natural earth',
    hover_data=['Score']
)

# Show the plot
fig.show()

Happiness score in different Regions¶

In [11]:
Happiness15 = df15.groupby('Region')[['Happiness Score']].mean()
Happiness15
Out[11]:
Happiness Score
Region
Australia and New Zealand 7.285000
Central and Eastern Europe 5.332931
Eastern Asia 5.626167
Latin America and Caribbean 6.144682
Middle East and Northern Africa 5.406900
North America 7.273000
Southeastern Asia 5.317444
Southern Asia 4.580857
Sub-Saharan Africa 4.202800
Western Europe 6.689619
In [12]:
Happiness16 = df16.groupby('Region')[['Happiness Score']].mean()
Happiness16
Out[12]:
Happiness Score
Region
Australia and New Zealand 7.323500
Central and Eastern Europe 5.370690
Eastern Asia 5.624167
Latin America and Caribbean 6.101750
Middle East and Northern Africa 5.386053
North America 7.254000
Southeastern Asia 5.338889
Southern Asia 4.563286
Sub-Saharan Africa 4.136421
Western Europe 6.685667
In [13]:
Happiness17 = df17.groupby('Region')[['Happiness.Score']].mean()
Happiness17
Out[13]:
Happiness.Score
Region
Australia and New Zealand 7.299000
Central and Eastern Europe 5.481864
Eastern Asia 5.863250
Latin America and Caribbean 5.980895
Middle East and Northern Africa 5.459133
North America 7.154500
Southeastern Asia 5.807667
Southern Asia 4.628429
Sub-Saharan Africa 4.102286
Western Europe 7.095600
In [14]:
Happiness15.plot(kind='pie', autopct='%1.1f%%', figsize=(10, 6), legend=False, subplots = True)
plt.title('Happiness Score 2015')
plt.ylabel('')  # Optional: hides the ylabel for cleaner look
plt.show()

Happiness16.plot(kind='pie', autopct='%1.1f%%', figsize=(10, 6), legend=False, subplots = True)
plt.title('Happiness Score 2016')
plt.ylabel('')  # Optional: hides the ylabel for cleaner look
plt.show()

Happiness17.plot(kind='pie', autopct='%1.1f%%', figsize=(10, 6), legend=False, subplots = True)
plt.title('Happiness Score 2017')
plt.ylabel('')  # Optional: hides the ylabel for cleaner look
plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]: